Skip to content

Commit 532e9fe

Browse files
committed
Make fixes to new chatbot features
1 parent 880894d commit 532e9fe

File tree

3 files changed

+174
-122
lines changed

3 files changed

+174
-122
lines changed

llamafile/chatbot.cpp

Lines changed: 13 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -30,27 +30,13 @@
3030
#include "llama.cpp/llama.h"
3131
#include "llama.cpp/server/server.h"
3232
#include "llamafile/bestline.h"
33+
#include "llamafile/chatbot.h"
3334
#include "llamafile/compute.h"
3435
#include "llamafile/highlight.h"
3536
#include "llamafile/llama.h"
3637
#include "llamafile/llamafile.h"
3738
#include "llamafile/string.h"
3839

39-
#define RESET "\e[0m"
40-
#define BOLD "\e[1m"
41-
#define FAINT "\e[2m"
42-
#define UNBOLD "\e[22m"
43-
#define RED "\e[31m"
44-
#define GREEN "\e[32m"
45-
#define MAGENTA "\e[35m"
46-
#define YELLOW "\e[33m"
47-
#define CYAN "\e[36m"
48-
#define UNFOREGROUND "\e[39m"
49-
#define BRIGHT_BLACK "\e[90m"
50-
#define BRIGHT_RED "\e[91m"
51-
#define BRIGHT_GREEN "\e[92m"
52-
#define CLEAR_FORWARD "\e[K"
53-
5440
enum Role {
5541
ROLE_USER,
5642
ROLE_ASSISTANT,
@@ -188,111 +174,10 @@ static std::string describe_position(llama_pos pos) {
188174
}
189175
if (pos > 0)
190176
description = std::string("... ") + description;
191-
return lf::collapse(description);
192-
}
193-
194-
static void on_help(const std::vector<std::string> &args) {
195-
if (args.size() == 1) {
196-
fprintf(stderr, "\
197-
" BOLD "available commands" RESET "\n\
198-
/clear restart conversation\n\
199-
/context print context window usage\n\
200-
/dump [FILE] print or save context window to file\n\
201-
/exit end program\n\
202-
/help [COMMAND] show help\n\
203-
/manual [on|off] toggle manual role mode\n\
204-
/pop restore context window size\n\
205-
/push push context window size to stack\n\
206-
/stack prints context window stack\n\
207-
/stats print performance metrics\n\
208-
/undo erases last message in conversation\n\
209-
");
210-
} else if (args[1] == "context") {
211-
fprintf(stderr, "\
212-
usage: /context" RESET "\n\
213-
prints information about context window usage. this helps you know how\n\
214-
soon you're going to run out of tokens for the current conversation.\n\
215-
");
216-
} else if (args[1] == "dump") {
217-
fprintf(stderr, "\
218-
" BOLD "usage: /dump [FILE]" RESET "\n\
219-
dumps raw tokens for current conversation history. special tokens are\n\
220-
printed in the a model specific chat syntax. this is useful for seeing\n\
221-
specifically what data is being evaluated by the model. by default it\n\
222-
will be printed to the terminal. if a FILE argument is specified, then\n\
223-
the raw conversation history will be written to that filename.\n\
224-
");
225-
} else if (args[1] == "exit") {
226-
fprintf(stderr, "\
227-
" BOLD "usage: /exit" RESET "\n\
228-
this command will cause the process to exit. it is essentially the same\n\
229-
as typing ctrl-d which signals an eof condition. it also does the same\n\
230-
thing as typing ctrl-c when the >>> user input prompt is displayed.\n\
231-
");
232-
} else if (args[1] == "manual") {
233-
fprintf(stderr, "\
234-
" BOLD "usage: /manual [on|off]" RESET "\n\
235-
puts the chatbot in manual mode. this is useful if you want to inject\n\
236-
a response as the model rather than the user. it's also possible to add\n\
237-
additional system prompts to the conversation history. when the manual\n\
238-
mode is activated, a hint is displayed next to the '>>>' indicating\n\
239-
the current role, which can be 'user', 'assistant', or 'system'. if\n\
240-
enter is pressed on an empty line, then llamafile will cycle between\n\
241-
all three roles. when /manual is specified without an argument, it will\n\
242-
toggle manual mode. otherwise an 'on' or 'off' argument is supplied.\n\
243-
");
244-
} else if (args[1] == "help") {
245-
fprintf(stderr, "\
246-
" BOLD "usage: /help [COMMAND]" RESET "\n\
247-
shows help on how to issue commands to your llamafile. if no argument is\n\
248-
specified, then a synopsis of all available commands will be printed. if\n\
249-
a specific command name is given (e.g. /help dump) then documentation on\n\
250-
the usage of that specific command will be printed.\n\
251-
");
252-
} else if (args[1] == "stats") {
253-
fprintf(stderr, "\
254-
" BOLD "usage: /stats" RESET "\n\
255-
prints performance statistics for current session. this includes prompt\n\
256-
evaluation time in tokens per second, which indicates prefill speed, or\n\
257-
how quickly llamafile is able to read text. the 'eval time' statistic\n\
258-
gives you prediction or token generation speed, in tokens per second,\n\
259-
which tells you how quickly llamafile is able to write text.\n\
260-
");
261-
} else if (args[1] == "clear") {
262-
fprintf(stderr, "\
263-
usage: /clear" RESET "\n\
264-
start conversation over from the beginning. this command adjusts the\n\
265-
context window to what it was after the initial system prompt. this\n\
266-
command also erases the /push stack.\n\
267-
");
268-
} else if (args[1] == "push") {
269-
fprintf(stderr, "\
270-
usage: /push" RESET "\n\
271-
save current size of context window to stack. this command may be used\n\
272-
with /pop to backtrack a conversation.\n\
273-
");
274-
} else if (args[1] == "pop") {
275-
fprintf(stderr, "\
276-
usage: /pop" RESET "\n\
277-
restores size of context window from stack. this command may be used\n\
278-
with /push to backtrack a conversation.\n\
279-
");
280-
} else if (args[1] == "stack") {
281-
fprintf(stderr, "\
282-
usage: /stack" RESET "\n\
283-
prints the current conversation stack, created by /push commands.\n\
284-
the stack consists of token offsets within the context window.\n\
285-
");
286-
} else if (args[1] == "undo") {
287-
fprintf(stderr, "\
288-
usage: /undo" RESET "\n\
289-
erases last exchange in conversation. in the normal mode, this includes\n\
290-
what the assistant last said, as well as the question that was asked. in\n\
291-
manual mode, this will erase only the last chat message.\n\
292-
");
293-
} else {
294-
fprintf(stderr, BRIGHT_RED "%s: unknown command" RESET "\n", args[1].c_str());
295-
}
177+
description = lf::collapse(description);
178+
if (!pos && description.empty())
179+
description = "<absolute beginning>";
180+
return description;
296181
}
297182

298183
static void on_manual(const std::vector<std::string> &args) {
@@ -323,6 +208,7 @@ static void on_clear(const std::vector<std::string> &args) {
323208
llama_kv_cache_seq_rm(g_ctx, 0, g_system_prompt_tokens, tokens_used() - g_system_prompt_tokens);
324209
g_history.resize(g_system_prompt_tokens);
325210
g_stack.clear();
211+
fix_stacks();
326212
}
327213

328214
static void print_stack(void) {
@@ -426,6 +312,11 @@ static void on_completion(const char *line, int pos, bestlineCompletions *comp)
426312

427313
// handle irc style commands like: `/arg0 arg1 arg2`
428314
static bool handle_command(const char *command) {
315+
if (!strcmp(command, "/?")) {
316+
const std::vector<std::string> args = {"?"};
317+
on_help(args);
318+
return true;
319+
}
429320
if (!(command[0] == '/' && std::isalpha(command[1])))
430321
return false;
431322
std::vector<std::string> args;
@@ -435,7 +326,7 @@ static bool handle_command(const char *command) {
435326
args.push_back(arg);
436327
if (args[0] == "exit" || args[0] == "bye") {
437328
exit(0);
438-
} else if (args[0] == "help" || args[0] == "?") {
329+
} else if (args[0] == "help") {
439330
on_help(args);
440331
} else if (args[0] == "stats") {
441332
on_stats(args);
@@ -671,11 +562,11 @@ int chatbot_main(int argc, char **argv) {
671562
free(line);
672563
continue;
673564
}
565+
g_said_something = true;
674566
if (handle_command(line)) {
675567
free(line);
676568
continue;
677569
}
678-
g_said_something = true;
679570
bool add_assi = !g_manual_mode;
680571
std::vector<llama_chat_msg> chat = {{get_role_name(g_role), line}};
681572
std::string msg = llama_chat_apply_template(g_model, params.chat_template, chat, add_assi);

llamafile/chatbot.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#pragma once
19+
#include <string>
20+
#include <vector>
21+
22+
#define RESET "\e[0m"
23+
#define BOLD "\e[1m"
24+
#define FAINT "\e[2m"
25+
#define UNBOLD "\e[22m"
26+
#define RED "\e[31m"
27+
#define GREEN "\e[32m"
28+
#define MAGENTA "\e[35m"
29+
#define YELLOW "\e[33m"
30+
#define CYAN "\e[36m"
31+
#define UNFOREGROUND "\e[39m"
32+
#define BRIGHT_BLACK "\e[90m"
33+
#define BRIGHT_RED "\e[91m"
34+
#define BRIGHT_GREEN "\e[92m"
35+
#define CLEAR_FORWARD "\e[K"
36+
37+
void on_help(const std::vector<std::string> &);

llamafile/chatbot_help.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#include "chatbot.h"
19+
20+
void on_help(const std::vector<std::string> &args) {
21+
if (args.size() == 1) {
22+
fprintf(stderr, "\
23+
" BOLD "available commands" RESET "\n\
24+
ctrl-j insert line in multi-line mode\n\
25+
\"\"\" use triple quotes for multi-line input\n\
26+
/clear restart conversation\n\
27+
/context print context window usage\n\
28+
/dump [FILE] print or save context window to file\n\
29+
/exit end program\n\
30+
/help [COMMAND] show help\n\
31+
/manual [on|off] toggle manual role mode\n\
32+
/pop restore context window size\n\
33+
/push push context window size to stack\n\
34+
/stack prints context window stack\n\
35+
/stats print performance metrics\n\
36+
/undo erases last message in conversation\n\
37+
");
38+
} else if (args[1] == "context") {
39+
fprintf(stderr, "\
40+
usage: /context" RESET "\n\
41+
prints information about context window usage. this helps you know how\n\
42+
soon you're going to run out of tokens for the current conversation.\n\
43+
");
44+
} else if (args[1] == "dump") {
45+
fprintf(stderr, "\
46+
" BOLD "usage: /dump [FILE]" RESET "\n\
47+
dumps raw tokens for current conversation history. special tokens are\n\
48+
printed in the a model specific chat syntax. this is useful for seeing\n\
49+
specifically what data is being evaluated by the model. by default it\n\
50+
will be printed to the terminal. if a FILE argument is specified, then\n\
51+
the raw conversation history will be written to that filename.\n\
52+
");
53+
} else if (args[1] == "exit") {
54+
fprintf(stderr, "\
55+
" BOLD "usage: /exit" RESET "\n\
56+
this command will cause the process to exit. it is essentially the same\n\
57+
as typing ctrl-d which signals an eof condition. it also does the same\n\
58+
thing as typing ctrl-c when the >>> user input prompt is displayed.\n\
59+
");
60+
} else if (args[1] == "manual") {
61+
fprintf(stderr, "\
62+
" BOLD "usage: /manual [on|off]" RESET "\n\
63+
puts the chatbot in manual mode. this is useful if you want to inject\n\
64+
a response as the model rather than the user. it's also possible to add\n\
65+
additional system prompts to the conversation history. when the manual\n\
66+
mode is activated, a hint is displayed next to the '>>>' indicating\n\
67+
the current role, which can be 'user', 'assistant', or 'system'. if\n\
68+
enter is pressed on an empty line, then llamafile will cycle between\n\
69+
all three roles. when /manual is specified without an argument, it will\n\
70+
toggle manual mode. otherwise an 'on' or 'off' argument is supplied.\n\
71+
");
72+
} else if (args[1] == "help") {
73+
fprintf(stderr, "\
74+
" BOLD "usage: /help [COMMAND]" RESET "\n\
75+
shows help on how to issue commands to your llamafile. if no argument is\n\
76+
specified, then a synopsis of all available commands will be printed. if\n\
77+
a specific command name is given (e.g. /help dump) then documentation on\n\
78+
the usage of that specific command will be printed.\n\
79+
");
80+
} else if (args[1] == "stats") {
81+
fprintf(stderr, "\
82+
" BOLD "usage: /stats" RESET "\n\
83+
prints performance statistics for current session. this includes prompt\n\
84+
evaluation time in tokens per second, which indicates prefill speed, or\n\
85+
how quickly llamafile is able to read text. the 'eval time' statistic\n\
86+
gives you prediction or token generation speed, in tokens per second,\n\
87+
which tells you how quickly llamafile is able to write text.\n\
88+
");
89+
} else if (args[1] == "clear") {
90+
fprintf(stderr, "\
91+
usage: /clear" RESET "\n\
92+
start conversation over from the beginning. this command adjusts the\n\
93+
context window to what it was after the initial system prompt. this\n\
94+
command also erases the /push stack.\n\
95+
");
96+
} else if (args[1] == "push") {
97+
fprintf(stderr, "\
98+
usage: /push" RESET "\n\
99+
save current size of context window to stack. this command may be used\n\
100+
with /pop to backtrack a conversation.\n\
101+
");
102+
} else if (args[1] == "pop") {
103+
fprintf(stderr, "\
104+
usage: /pop" RESET "\n\
105+
restores size of context window from stack. this command may be used\n\
106+
with /push to backtrack a conversation.\n\
107+
");
108+
} else if (args[1] == "stack") {
109+
fprintf(stderr, "\
110+
usage: /stack" RESET "\n\
111+
prints the current conversation stack, created by /push commands.\n\
112+
the stack consists of token offsets within the context window.\n\
113+
");
114+
} else if (args[1] == "undo") {
115+
fprintf(stderr, "\
116+
usage: /undo" RESET "\n\
117+
erases last exchange in conversation. in the normal mode, this includes\n\
118+
what the assistant last said, as well as the question that was asked. in\n\
119+
manual mode, this will erase only the last chat message.\n\
120+
");
121+
} else {
122+
fprintf(stderr, BRIGHT_RED "%s: unknown command" RESET "\n", args[1].c_str());
123+
}
124+
}

0 commit comments

Comments
 (0)